{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Note: you may need to restart the kernel to use updated packages.\n"
          ]
        }
      ],
      "source": [
        "%pip install -q bitsandbytes>=0.43.1 accelerate transformers torch sentencepiece"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "💻 CPU mode - loading without quantization...\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "2fa644e735144ab0a238f031bf7c6c7a",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "93cedb68e7374f7f98622d24ee02ba33",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Llama model failed to load: <ContextVar name='shell_parent' at 0x1061d0220>\n",
            "Trying alternative loading method...\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "17d3da1874734c7fbf542b239f6f5ba0",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
            "    self.close()\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
            "    self.disp(bar_style='danger', check_delay=False)\n",
            "AttributeError: 'tqdm' object has no attribute 'disp'\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Llama model completely failed: <ContextVar name='shell_parent' at 0x1061d0220>\n",
            "Will use OpenAI only mode.\n"
          ]
        }
      ],
      "source": [
        "import torch\n",
        "import pandas as pd\n",
        "import random\n",
        "from io import StringIO\n",
        "from openai import OpenAI\n",
        "import gradio as gr\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
        "from dotenv import load_dotenv\n",
        "import os\n",
        "\n",
        "load_dotenv(override=True)\n",
        "openai = OpenAI()\n",
        "\n",
        "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
        "\n",
        "try:\n",
        "    tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
        "    tokenizer.pad_token = tokenizer.eos_token\n",
        "    \n",
        "    if torch.cuda.is_available():\n",
        "        print(\"🚀 CUDA available - loading with quantization...\")\n",
        "        quant_config = BitsAndBytesConfig(\n",
        "            load_in_4bit=True,\n",
        "            bnb_4bit_use_double_quant=True,\n",
        "            bnb_4bit_compute_dtype=torch.bfloat16,\n",
        "            bnb_4bit_quant_type=\"nf4\"\n",
        "        )\n",
        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
        "    else:\n",
        "        print(\"💻 CPU mode - loading without quantization...\")\n",
        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float16)\n",
        "    \n",
        "    print(\"Llama model loaded successfully!\")\n",
        "except Exception as e:\n",
        "    print(f\"Llama model failed to load: {e}\")\n",
        "    print(\"Trying alternative loading method...\")\n",
        "    try:\n",
        "        tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
        "        tokenizer.pad_token = tokenizer.eos_token\n",
        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float32)\n",
        "        print(\"Llama model loaded in CPU mode!\")\n",
        "    except Exception as e2:\n",
        "        print(f\"Llama model completely failed: {e2}\")\n",
        "        print(\"Will use OpenAI only mode.\")\n",
        "        model = None\n",
        "        tokenizer = None\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [],
      "source": [
        "def generate_with_openai(dataset_type, num_records, region):\n",
        "    prompts = {\n",
        "        \"employees\": f\"Generate {num_records} synthetic employee records with {region} addresses. Include: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country.\",\n",
        "        \"customers\": f\"Generate {num_records} synthetic customer records with {region} addresses. Include: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date.\",\n",
        "        \"products\": f\"Generate {num_records} synthetic product records. Include: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date.\",\n",
        "        \"transactions\": f\"Generate {num_records} synthetic transaction records. Include: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status.\"\n",
        "    }\n",
        "    \n",
        "    response = openai.chat.completions.create(\n",
        "        model=\"gpt-4o-mini\",\n",
        "        messages=[\n",
        "            {\"role\": \"system\", \"content\": \"You are a data generation expert. Create realistic, diverse synthetic data in CSV format.\"},\n",
        "            {\"role\": \"user\", \"content\": prompts[dataset_type]}\n",
        "        ]\n",
        "    )\n",
        "    \n",
        "    return clean_csv_response(response.choices[0].message.content)\n",
        "\n",
        "def generate_with_llama(dataset_type, num_records, region):\n",
        "    if model is None or tokenizer is None:\n",
        "        return \"❌ Llama model not available. Please use OpenAI option.\"\n",
        "    \n",
        "    prompts = {\n",
        "        \"employees\": f\"Create {num_records} employee records with {region} addresses: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country. Format as CSV.\",\n",
        "        \"customers\": f\"Create {num_records} customer records with {region} addresses: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date. Format as CSV.\",\n",
        "        \"products\": f\"Create {num_records} product records: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date. Format as CSV.\",\n",
        "        \"transactions\": f\"Create {num_records} transaction records: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status. Format as CSV.\"\n",
        "    }\n",
        "    \n",
        "    try:\n",
        "        inputs = tokenizer(prompts[dataset_type], return_tensors=\"pt\").to(device)\n",
        "        \n",
        "        with torch.no_grad():\n",
        "            outputs = model.generate(\n",
        "                **inputs,\n",
        "                max_new_tokens=2048,\n",
        "                temperature=0.7,\n",
        "                do_sample=True,\n",
        "                pad_token_id=tokenizer.eos_token_id\n",
        "            )\n",
        "        \n",
        "        response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
        "        return clean_csv_response(response)\n",
        "    except Exception as e:\n",
        "        return f\"❌ Error generating with Llama: {str(e)}\"\n",
        "\n",
        "def clean_csv_response(response):\n",
        "    response = response.strip()\n",
        "    if \"```\" in response:\n",
        "        response = response.split(\"```\")[1] if len(response.split(\"```\")) > 1 else response\n",
        "    return response\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {},
      "outputs": [],
      "source": [
        "def generate_dataset(dataset_type, num_records, region, model_choice):\n",
        "    try:\n",
        "        if model_choice == \"OpenAI GPT-4o-mini\":\n",
        "            csv_data = generate_with_openai(dataset_type, num_records, region)\n",
        "        else:\n",
        "            csv_data = generate_with_llama(dataset_type, num_records, region)\n",
        "        \n",
        "        df = pd.read_csv(StringIO(csv_data))\n",
        "        return df, csv_data, f\"✅ Generated {len(df)} records successfully!\"\n",
        "    except Exception as e:\n",
        "        return pd.DataFrame(), \"\", f\"❌ Error: {str(e)}\"\n",
        "\n",
        "def download_csv(csv_data):\n",
        "    return csv_data if csv_data else \"\"\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "* Running on local URL:  http://127.0.0.1:7863\n",
            "* Running on public URL: https://aaf0c65f7daaafbd21.gradio.live\n",
            "\n",
            "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div><iframe src=\"https://aaf0c65f7daaafbd21.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/plain": []
          },
          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n",
            "    response = await route_utils.call_process_api(\n",
            "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    ...<5 lines>...\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n",
            "    output = await app.get_blocks().process_api(\n",
            "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    ...<11 lines>...\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n",
            "    data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n",
            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n",
            "    await processing_utils.async_move_files_to_cache(\n",
            "    ...<3 lines>...\n",
            "    )\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n",
            "    return await client_utils.async_traverse(\n",
            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "        data, _move_to_cache, client_utils.is_file_obj_with_meta\n",
            "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n",
            "    return await func(json_obj)\n",
            "           ^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n",
            "    elif utils.is_static_file(payload):\n",
            "         ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n",
            "    return _is_static_file(file_path, _StaticFiles.all_paths)\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n",
            "    if not file_path.exists():\n",
            "           ~~~~~~~~~~~~~~~~^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n",
            "    self.stat(follow_symlinks=follow_symlinks)\n",
            "    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n",
            "    return os.stat(self, follow_symlinks=follow_symlinks)\n",
            "           ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "OSError: [Errno 63] File name too long: 'csv\\ntransaction_id,customer_id,product_id,amount,quantity,transaction_date,payment_method,status\\n1,CUST001,PROD1001,29.99,1,2023-01-15,Credit Card,Completed\\n2,CUST002,PROD1002,15.49,2,2023-01-18,Debit Card,Completed\\n3,CUST003,PROD1003,65.00,1,2023-02-01,PayPal,Pending\\n4,CUST001,PROD1004,10.99,3,2023-02-10,Credit Card,Completed\\n5,CUST004,PROD1005,45.50,1,2023-02-20,Cash,Completed\\n6,CUST005,PROD1006,89.99,1,2023-03-02,Debit Card,Completed\\n7,CUST002,PROD1007,24.99,2,2023-03-14,Credit Card,Cancelled\\n8,CUST003,PROD1008,12.50,4,2023-03-20,PayPal,Completed\\n9,CUST006,PROD1009,150.00,1,2023-04-01,Credit Card,Completed\\n10,CUST007,PROD1010,30.00,2,2023-04-10,Debit Card,Pending\\n11,CUST008,PROD1011,5.99,10,2023-04-12,Cash,Completed\\n12,CUST001,PROD1012,70.00,1,2023-05-05,Credit Card,Completed\\n13,CUST009,PROD1013,100.00,1,2023-05-15,PayPal,Completed\\n14,CUST004,PROD1014,45.00,1,2023-05-25,Credit Card,Returned\\n15,CUST002,PROD1015,7.50,5,2023-06-10,Debit Card,Completed\\n16,CUST005,PROD1016,22.00,3,2023-06-12,Cash,Completed\\n17,CUST006,PROD1017,120.00,1,2023-06-20,Credit Card,Pending\\n18,CUST008,PROD1018,80.00,1,2023-07-01,PayPal,Completed\\n19,CUST007,PROD1019,60.00,2,2023-07-05,Credit Card,Completed\\n20,CUST003,PROD1020,15.00,3,2023-07-15,Debit Card,Completed\\n'\n",
            "Traceback (most recent call last):\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n",
            "    response = await route_utils.call_process_api(\n",
            "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    ...<5 lines>...\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n",
            "    output = await app.get_blocks().process_api(\n",
            "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    ...<11 lines>...\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n",
            "    data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n",
            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n",
            "    await processing_utils.async_move_files_to_cache(\n",
            "    ...<3 lines>...\n",
            "    )\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n",
            "    return await client_utils.async_traverse(\n",
            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "        data, _move_to_cache, client_utils.is_file_obj_with_meta\n",
            "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "    )\n",
            "    ^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n",
            "    return await func(json_obj)\n",
            "           ^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n",
            "    elif utils.is_static_file(payload):\n",
            "         ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n",
            "    return _is_static_file(file_path, _StaticFiles.all_paths)\n",
            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n",
            "    if not file_path.exists():\n",
            "           ~~~~~~~~~~~~~~~~^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n",
            "    self.stat(follow_symlinks=follow_symlinks)\n",
            "    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n",
            "    return os.stat(self, follow_symlinks=follow_symlinks)\n",
            "           ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "OSError: [Errno 63] File name too long: 'csv\\nproduct_id,name,category,price,description,brand,stock_quantity,supplier,created_date\\nP001,Wireless Earbuds,Electronics,79.99,\"Noise-cancelling wireless earbuds with touch controls.\",\"SoundWave\",250,\"TechSupply Co.\",2023-08-15\\nP002,Men\\'s Running Shoes,Sportswear,89.99,\"Lightweight and breathable running shoes designed for comfort.\",\"FitRun\",150,\"SportyDeals\",2023-09-05\\nP003,4K Ultra HD TV,Electronics,499.99,\"55-inch 4K Ultra HD Smart LED TV with HDR.\",\"VisionMax\",80,\"HomeTech Distributors\",2023-08-20\\nP004,Coffee Maker,Home Appliances,49.99,\"Programmable coffee maker with 12-cup capacity.\",\"BrewMaster\",200,\"Kitchen Supply Inc.\",2023-07-30\\nP005,Water Bottle,Sports Equipment,19.99,\"Insulated stainless steel water bottle, keeps drinks cold for 24 hours.\",\"HydroCool\",500,\"EcoBottles\",2023-09-10\\nP006,Ergonomic Office Chair,Furniture,199.99,\"Comfortable ergonomic chair with lumbar support and adjustable height.\",\"Home Comforts\",75,\"OfficeWorks\",2023-08-28\\nP007,Smart Watch,Electronics,249.99,\"Smart watch with fitness tracking and heart rate monitor.\",\"FitTrack\",120,\"GizmoGadgets\",2023-09-12\\nP008,Yoga Mat,Sports Equipment,29.99,\"Non-slip yoga mat with extra cushioning.\",\"Zen Yoga\",350,\"Wellness Store\",2023-09-15\\nP009,Air Fryer,Home Appliances,89.99,\"Compact air fryer with multiple cooking presets.\",\"CrispyCook\",145,\"KitchenPro\",2023-08-02\\nP010,Wireless Mouse,Electronics,29.99,\"Ergonomic wireless mouse with customizable buttons.\",\"ClickTech\",300,\"Gadget World\",2023-07-25\\nP011,Spice Rack Organization Set,Home Decor,39.99,\"Rotating spice rack with 12 glass jars included.\",\"HomeChef\",210,\"OrganizeIt Co.\",2023-08-17\\nP012,Dumbbell Set,Sports Equipment,99.99,\"Adjustable dumbbell set ranging from 5 to 30 lbs.\",\"StrengthTech\",100,\"Fit Equipment\",2023-09-01\\nP013,Kids\\' Backpack,Accessories,34.99,\"Durable backpack with multiple compartments for school.\",\"KidStyle\",175,\"Backpack Haven\",2023-08-23\\nP014,Digital Camera,Electronics,399.99,\"Compact digital camera with 20 MP and full HD video.\",\"SnapShot\",60,\"Camera Boutique\",2023-09-09\\nP015,Portable Bluetooth Speaker,Electronics,59.99,\"Water-resistant Bluetooth speaker with 12 hours of playtime.\",\"SoundBox\",130,\"Audio Plus\",2023-09-14\\nP016,Electric Toothbrush,Health & Personal Care,59.99,\"Rechargeable electric toothbrush with timer and pressure sensor.\",\"DentalCare\",400,\"HealthFirst Supplies\",2023-08-30\\nP017,Tote Bag,Accessories,24.99,\"Stylish and spacious tote bag for everyday use.\",\"Chic Designs\",300,\"Fashion Hub\",2023-09-06\\nP018,Sneaker Cleaner Kit,Accessories,15.99,\"Complete shoe cleaning kit for all types of sneakers.\",\"FreshFeet\",500,\"CleanKicks\",2023-09-03\\nP019,Camping Tent,Outdoor,129.99,\"Easy setup camping tent for 4 people, weather-resistant.\",\"Outdoors Pro\",85,\"Adventure Outfitters\",2023-08-12\\nP020,LED Desk Lamp,Home Decor,39.99,\"Adjustable LED desk lamp with multiple brightness settings.\",\"BrightEase\",170,\"HomeLight Solutions\",2023-09-08\\n'\n"
          ]
        }
      ],
      "source": [
        "with gr.Blocks(\n",
        "    theme=gr.themes.Soft(\n",
        "        primary_hue=\"blue\",\n",
        "        neutral_hue=\"gray\",\n",
        "        font=[\"Inter\", \"ui-sans-serif\", \"system-ui\"]\n",
        "    ),\n",
        "    css=\"\"\"\n",
        "    .gradio-container { max-width: 1200px !important; margin: auto !important; }\n",
        "    .header { text-align: center; margin-bottom: 2em; }\n",
        "    .header h1 { color: #1f2937; font-size: 2.5em; margin-bottom: 0.5em; }\n",
        "    .header p { color: #6b7280; font-size: 1.1em; }\n",
        "    .generate-btn { background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%) !important; }\n",
        "    .generate-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 25px rgba(59, 130, 246, 0.3) !important; }\n",
        "    .stats-card { background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%); border-radius: 12px; padding: 1.5em; margin: 1em 0; }\n",
        "    \"\"\"\n",
        ") as demo:\n",
        "    \n",
        "    gr.HTML(\"\"\"\n",
        "    <div class=\"header\">\n",
        "        <h1>Synthetic Dataset Generator</h1>\n",
        "        <p>Generate realistic synthetic datasets using AI models for testing and development</p>\n",
        "    </div>\n",
        "    \"\"\")\n",
        "    \n",
        "    with gr.Row():\n",
        "        with gr.Column(scale=1):\n",
        "            gr.Markdown(\"### Configuration\")\n",
        "            \n",
        "            dataset_type = gr.Dropdown(\n",
        "                choices=[\"employees\", \"customers\", \"products\", \"transactions\"],\n",
        "                value=\"employees\",\n",
        "                label=\"Dataset Type\",\n",
        "                info=\"Choose the type of data to generate\"\n",
        "            )\n",
        "            \n",
        "            num_records = gr.Slider(\n",
        "                minimum=5, maximum=100, step=5, value=20,\n",
        "                label=\"Number of Records\",\n",
        "                info=\"How many records to generate\"\n",
        "            )\n",
        "            \n",
        "            region = gr.Dropdown(\n",
        "                choices=[\"US Only\", \"International\", \"Mixed\", \"Europe\", \"Asia\"],\n",
        "                value=\"US Only\",\n",
        "                label=\"Geographic Region\",\n",
        "                info=\"Location for addresses and phone numbers\"\n",
        "            )\n",
        "            \n",
        "            model_choice = gr.Radio(\n",
        "                choices=[\"OpenAI GPT-4o-mini\", \"Llama 3.1 8B\"],\n",
        "                value=\"OpenAI GPT-4o-mini\",\n",
        "                label=\"AI Model\",\n",
        "                info=\"Choose the AI model for generation\"\n",
        "            )\n",
        "            \n",
        "            generate_btn = gr.Button(\n",
        "                \"Generate Dataset\",\n",
        "                variant=\"primary\",\n",
        "                elem_classes=\"generate-btn\",\n",
        "                size=\"lg\"\n",
        "            )\n",
        "        \n",
        "        with gr.Column(scale=2):\n",
        "            gr.Markdown(\"### Generated Dataset\")\n",
        "            \n",
        "            status = gr.Markdown(\"Ready to generate your dataset!\")\n",
        "            \n",
        "            dataframe_output = gr.Dataframe(\n",
        "                value=pd.DataFrame(),\n",
        "                label=\"Dataset Preview\",\n",
        "                wrap=True\n",
        "            )\n",
        "            \n",
        "            with gr.Row():\n",
        "                csv_output = gr.Textbox(\n",
        "                    value=\"\",\n",
        "                    label=\"CSV Data\",\n",
        "                    lines=10,\n",
        "                    max_lines=15\n",
        "                )\n",
        "                \n",
        "                download_btn = gr.DownloadButton(\n",
        "                    \"Download CSV\",\n",
        "                    elem_id=\"download-btn\"\n",
        "                )\n",
        "    \n",
        "    generate_btn.click(\n",
        "        generate_dataset,\n",
        "        inputs=[dataset_type, num_records, region, model_choice],\n",
        "        outputs=[dataframe_output, csv_output, status]\n",
        "    )\n",
        "    \n",
        "    csv_output.change(\n",
        "        download_csv,\n",
        "        inputs=[csv_output],\n",
        "        outputs=[download_btn]\n",
        "    )\n",
        "\n",
        "demo.launch(share=True, inbrowser=True)\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "base",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.13.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
